소스 검색

CSV CLI Tools

simon 9 달 전
부모
커밋
5c57ef5764

+ 4 - 0
.gitignore

@@ -146,3 +146,7 @@ bh_unicode_properties.cache
 # https://packagecontrol.io/packages/sublime-github
 GitHub.sublime-settings
 
+
+*.txt
+*.pprof
+

+ 53 - 0
rggo/performance/colStats/csv.go

@@ -0,0 +1,53 @@
+package main
+
+import (
+	"encoding/csv"
+	"fmt"
+	"io"
+	"strconv"
+)
+
+type statsFunc func(data []float64) float64
+
+func sum(data []float64) float64 {
+	sum := 0.0
+	for _, v := range data {
+		sum += v
+	}
+	return sum
+}
+
+func avg(data []float64) float64 {
+	return sum(data) / float64(len(data))
+}
+
+func csv2float(r io.Reader, column int) ([]float64, error) {
+	// Create the CSV Reader used to read in data from CSV files
+	cr := csv.NewReader(r)
+	// Adjusting for 0 based index
+	column--
+
+	// Read in all CSV data
+	allData, err := cr.ReadAll()
+	if err != nil {
+		return nil, fmt.Errorf("cannot read data from file: %w", err)
+	}
+
+	var data []float64
+
+	for i, row := range allData {
+		if i == 0 {
+			continue
+		}
+		if len(row) <= column {
+			return nil, fmt.Errorf("%w: file has only %d columns", ErrInvalidColumn, len(row))
+		}
+		v, err := strconv.ParseFloat(row[column], 64)
+		if err != nil {
+			return nil, fmt.Errorf("%w: %s", ErrNotNumber, err)
+		}
+		data = append(data, v)
+	}
+
+	return data, nil
+}

+ 104 - 0
rggo/performance/colStats/csv_test.go

@@ -0,0 +1,104 @@
+package main
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"testing"
+	"testing/iotest"
+)
+
+func TestOperations(t *testing.T) {
+	data := [][]float64{
+		{10, 20, 15, 30, 45, 50, 100, 30},
+		{5.5, 8, 2.2, 9.75, 8.45, 3, 2.5, 10.25, 4.75, 6.1, 7.67, 12.287, 5.47},
+		{-10, -20},
+		{102, 37, 44, 57, 67, 129},
+	}
+	testCases := []struct {
+		name string
+		op   statsFunc
+		exp  []float64
+	}{
+		{"Sum", sum, []float64{300, 85.927, -30, 436}},
+		{"Avg", avg, []float64{37.5, 6.609769230769231, -15, 72.666666666666666}},
+	}
+
+	for _, tc := range testCases {
+		for k, exp := range tc.exp {
+			name := fmt.Sprintf("%sData%d", tc.name, k+1)
+			t.Run(name, func(t *testing.T) {
+				res := tc.op(data[k])
+
+				if res != exp {
+					t.Errorf("Expected %g, got %g instead", exp, res)
+				}
+			})
+		}
+	}
+}
+
+func TestCSV2Float(t *testing.T) {
+	csvData := `IP Address,Requests,Response Time 
+192.168.0.199,2056,236
+192.168.0.88,899,220
+192.168.0.199,3054,226
+192.168.0.100,4133,218
+192.168.0.199,950,238
+`
+	// Test cases for CSV2Float Test
+	testCases := []struct {
+		name   string
+		col    int
+		exp    []float64
+		expErr error
+		r      io.Reader
+	}{
+		{name: "Column2", col: 2,
+			exp: []float64{2056, 899, 3054, 4133, 950}, expErr: nil,
+			r: bytes.NewBufferString(csvData),
+		},
+		{name: "Column3", col: 3,
+			exp: []float64{236, 220, 226, 218, 238}, expErr: nil,
+			r: bytes.NewBufferString(csvData),
+		},
+		{name: "FailRead", col: 1,
+			exp:    nil,
+			expErr: iotest.ErrTimeout,
+			r:      iotest.TimeoutReader(bytes.NewReader([]byte{0})),
+		},
+		{name: "FailedNotNumber", col: 1,
+			exp:    nil,
+			expErr: ErrNotNumber,
+			r:      bytes.NewBufferString(csvData),
+		},
+		{name: "FailedInvalidColumn", col: 4,
+			exp:    nil,
+			expErr: ErrInvalidColumn,
+			r:      bytes.NewBufferString(csvData),
+		}}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			res, err := csv2float(tc.r, tc.col)
+			if tc.expErr != nil {
+				if err == nil {
+					t.Errorf("Expected error. Got nill instead")
+				}
+				if !errors.Is(err, tc.expErr) {
+					t.Errorf("Expected error %q, got %q instead", tc.expErr, err)
+				}
+				return
+			}
+			if err != nil {
+				t.Errorf("Unexpected error: %q", err)
+			}
+			for i, exp := range tc.exp {
+				if res[i] != exp {
+					t.Errorf("Expected %g, got %g instead", exp, res[i])
+				}
+			}
+		})
+	}
+}

+ 10 - 0
rggo/performance/colStats/errors.go

@@ -0,0 +1,10 @@
+package main
+
+import "errors"
+
+var (
+	ErrNotNumber        = errors.New("data is not numeric")
+	ErrInvalidColumn    = errors.New("invalid column number")
+	ErrNoFiles          = errors.New("no input files")
+	ErrInvalidOperation = errors.New("invalid operation")
+)

+ 3 - 0
rggo/performance/colStats/go.mod

@@ -0,0 +1,3 @@
+module pragprog.com/rggo/performance/colStats
+
+go 1.23.5

+ 66 - 0
rggo/performance/colStats/main.go

@@ -0,0 +1,66 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"os"
+)
+
+func main() {
+	op := flag.String("op", "sum", "Operation to be executed")
+	column := flag.Int("col", 1, "CSV column on which to execute operation")
+
+	flag.Parse()
+
+	if err := run(flag.Args(), *op, *column, os.Stdout); err != nil {
+		_, err := fmt.Fprintln(os.Stderr, err)
+		if err != nil {
+			return
+		}
+		os.Exit(1)
+	}
+}
+
+func run(filenames []string, op string, column int, out io.Writer) error {
+	var opFunc statsFunc
+
+	if len(filenames) == 0 {
+		return ErrNoFiles
+	}
+	if column < 1 {
+		return fmt.Errorf("%w: %d", ErrInvalidColumn, column)
+	}
+	switch op {
+	case "sum":
+		opFunc = sum
+	case "avg":
+		opFunc = avg
+	default:
+		return fmt.Errorf("%w: %s", ErrInvalidOperation, op)
+
+	}
+	consolidate := make([]float64, 0)
+	// Loop through all files adding their data to consolidate
+	for _, fname := range filenames {
+		// Open the file for reading
+		f, err := os.Open(fname)
+		if err != nil {
+			return fmt.Errorf("cannot open file: %w", err)
+		}
+		// Parse the CSV into a slice of float64 numbers
+		data, err := csv2float(f, column)
+		if err != nil {
+			return err
+		}
+		if err := f.Close(); err != nil {
+			return err
+		}
+		// Append the data to consolidate
+		consolidate = append(consolidate, data...)
+	}
+
+	_, err := fmt.Fprintln(out, opFunc(consolidate))
+
+	return err
+}

+ 70 - 0
rggo/performance/colStats/main_test.go

@@ -0,0 +1,70 @@
+// Author: simon
+// Author: ynwdlxm@163.com
+// Date: 2025/2/10 10:01
+// Desc:
+
+package main
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestRun(t *testing.T) {
+	testCases := []struct {
+		name   string
+		col    int
+		op     string
+		exp    string
+		files  []string
+		expErr error
+	}{
+		{name: "RunAvg1File", col: 3, op: "avg", exp: "227.6\n", files: []string{"./testdata/example.csv"}, expErr: nil},
+		{name: "RunAvgMultiFiles", col: 3, op: "avg", exp: "233.84\n", files: []string{"./testdata/example.csv", "./testdata/example2.csv"}, expErr: nil},
+		{name: "RunFailedRead", col: 2, op: "avg", exp: "", files: []string{"./testdata/example.csv", "./testdata/fakefile.csv"}, expErr: os.ErrNotExist},
+		{name: "RunFailCol", col: 0, op: "avg", exp: "", files: []string{"./testdata/example.csv"}, expErr: ErrInvalidColumn},
+		{name: "RunFailNoFiles", col: 2, op: "avg", exp: "", files: []string{}, expErr: ErrNoFiles},
+		{name: "RunFailOperation", col: 2, op: "invalid", exp: "", files: []string{"./testdata/example.csv"}, expErr: ErrInvalidOperation},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			var res bytes.Buffer
+			err := run(tc.files, tc.op, tc.col, &res)
+
+			if tc.expErr != nil {
+				if err == nil {
+					t.Errorf("Expected error. Got nil instead")
+				}
+				if !errors.Is(err, tc.expErr) {
+					t.Errorf("Expected error %q, got %q instead", tc.expErr, err)
+				}
+				return
+			}
+			if err != nil {
+				t.Errorf("Unexpected error: %q", err)
+			}
+			if res.String() != tc.exp {
+				t.Errorf("Expected %q, got %q instead", tc.exp, &res)
+			}
+		})
+	}
+}
+
+func BenchmarkRun(b *testing.B) {
+	filenames, err := filepath.Glob("./testdata/benchmark/*.csv")
+	if err != nil {
+		b.Fatal(err)
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if err := run(filenames, "avg", 2, io.Discard); err != nil {
+			b.Error(err)
+		}
+	}
+}