diff options
author | Emiliano Ciavatta | 2020-09-15 21:17:06 +0000 |
---|---|---|
committer | Emiliano Ciavatta | 2020-09-15 21:17:06 +0000 |
commit | 2954045cb28ea8cbf4dbd019355a2df8fed28ccc (patch) | |
tree | 780a6f7644661699281f39653e3e0be7c64aa025 | |
parent | 4f70dbfb5519ae2a6e68869ecba0a9e4cfb3013b (diff) |
Refactor gzip decoder, added parsers with reproducers
-rw-r--r-- | connection_streams_controller.go | 59 | ||||
-rw-r--r-- | go.mod | 1 | ||||
-rw-r--r-- | go.sum | 2 | ||||
-rw-r--r-- | parsers/http_request_parser.go | 144 | ||||
-rw-r--r-- | parsers/http_response_parser.go | 72 | ||||
-rw-r--r-- | parsers/parser.go | 28 | ||||
-rw-r--r-- | parsers/parser_utils.go | 24 | ||||
-rw-r--r-- | utils.go | 82 |
8 files changed, 310 insertions, 102 deletions
diff --git a/connection_streams_controller.go b/connection_streams_controller.go index 096210e..3ba30f8 100644 --- a/connection_streams_controller.go +++ b/connection_streams_controller.go @@ -1,7 +1,9 @@ package main import ( + "bytes" "context" + "github.com/eciavatta/caronte/parsers" log "github.com/sirupsen/logrus" "time" ) @@ -25,13 +27,13 @@ type ConnectionStream struct { type PatternSlice [2]uint64 type Payload struct { - FromClient bool `json:"from_client"` - Content string `json:"content"` - DecodedContent string `json:"decoded_content"` - Index int `json:"index"` - Timestamp time.Time `json:"timestamp"` - IsRetransmitted bool `json:"is_retransmitted"` - RegexMatches []RegexSlice `json:"regex_matches"` + FromClient bool `json:"from_client"` + Content string `json:"content"` + Metadata parsers.Metadata `json:"metadata"` + Index int `json:"index"` + Timestamp time.Time `json:"timestamp"` + IsRetransmitted bool `json:"is_retransmitted"` + RegexMatches []RegexSlice `json:"regex_matches"` } type RegexSlice struct { @@ -56,8 +58,8 @@ func NewConnectionStreamsController(storage Storage) ConnectionStreamsController } func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, connectionID RowID, - format QueryFormat) []Payload { - payloads := make([]Payload, 0, InitialPayloadsSize) + format QueryFormat) []*Payload { + payloads := make([]*Payload, 0, InitialPayloadsSize) var clientIndex, serverIndex, globalIndex uint64 if format.Limit <= 0 { @@ -76,7 +78,11 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c return serverBlocksIndex < len(serverStream.BlocksIndexes) } - var payload Payload + var payload *Payload + payloadsBuffer := make([]*Payload, 0, 16) + contentChunkBuffer := new(bytes.Buffer) + var lastContentSlice []byte + var sideChanged, lastClient, lastServer bool for !clientStream.ID.IsZero() || !serverStream.ID.IsZero() { if hasClientBlocks() && (!hasServerBlocks() || // next payload is from client clientStream.BlocksTimestamps[clientBlocksIndex].UnixNano() <= @@ -90,10 +96,9 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c } size := uint64(end - start) - payload = Payload{ + payload = &Payload{ FromClient: true, Content: DecodeBytes(clientStream.Payload[start:end], format.Format), - //Request: ReadRequest(content), Index: start, Timestamp: clientStream.BlocksTimestamps[clientBlocksIndex], IsRetransmitted: clientStream.BlocksLoss[clientBlocksIndex], @@ -102,6 +107,9 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c clientIndex += size globalIndex += size clientBlocksIndex++ + + lastContentSlice = clientStream.Payload[start:end] + sideChanged, lastClient, lastServer = lastServer, true, false } else { // next payload is from server start := serverStream.BlocksIndexes[serverBlocksIndex] end := 0 @@ -112,15 +120,9 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c } size := uint64(end - start) - content := DecodeBytes(serverStream.Payload[start:end], format.Format) - - plainContent := DecodeBytes(serverStream.Payload[start:end], "default") - decodedContent := DecodeBytes([]byte(DecodeHttpResponse(plainContent)), format.Format) - - payload = Payload{ + payload = &Payload{ FromClient: false, - Content: content, - DecodedContent: decodedContent, + Content: DecodeBytes(serverStream.Payload[start:end], format.Format), Index: start, Timestamp: serverStream.BlocksTimestamps[serverBlocksIndex], IsRetransmitted: serverStream.BlocksLoss[serverBlocksIndex], @@ -129,12 +131,29 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c serverIndex += size globalIndex += size serverBlocksIndex++ + + lastContentSlice = serverStream.Payload[start:end] + sideChanged, lastClient, lastServer = lastClient, false, true + } + + if sideChanged { + metadata := parsers.Parse(contentChunkBuffer.Bytes()) + for _, elem := range payloadsBuffer { + elem.Metadata = metadata + } + + payloadsBuffer = payloadsBuffer[:0] + contentChunkBuffer.Reset() } + payloadsBuffer = append(payloadsBuffer, payload) + contentChunkBuffer.Write(lastContentSlice) if globalIndex > format.Skip { + // problem: waste of time if the payload is discarded payloads = append(payloads, payload) } if globalIndex > format.Skip+format.Limit { + // problem: the last chunk is not parsed, but can be ok because it is not finished return payloads } @@ -17,4 +17,5 @@ require ( go.mongodb.org/mongo-driver v1.3.1 golang.org/x/net v0.0.0-20190620200207-3b0461eec859 // indirect golang.org/x/sys v0.0.0-20200406155108-e3b113bbe6a4 // indirect + moul.io/http2curl v1.0.0 ) @@ -179,3 +179,5 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +moul.io/http2curl v1.0.0 h1:6XwpyZOYsgZJrU8exnG87ncVkU1FVCcTRpwzOkTDUi8= +moul.io/http2curl v1.0.0/go.mod h1:f6cULg+e4Md/oW1cYmwW4IWQOVl2lGbmCNGOHvzX2kE= diff --git a/parsers/http_request_parser.go b/parsers/http_request_parser.go new file mode 100644 index 0000000..d204d4c --- /dev/null +++ b/parsers/http_request_parser.go @@ -0,0 +1,144 @@ +package parsers + +import ( + "bufio" + "bytes" + "encoding/json" + "io/ioutil" + "moul.io/http2curl" + "net/http" + "strings" +) + +type HttpRequestMetadata struct { + BasicMetadata + Method string `json:"method"` + URL string `json:"url"` + Protocol string `json:"protocol"` + Host string `json:"host"` + Headers map[string]string `json:"headers"` + Cookies map[string]string `json:"cookies" binding:"omitempty"` + ContentLength int64 `json:"content_length"` + FormData map[string]string `json:"form_data" binding:"omitempty"` + Body string `json:"body" binding:"omitempty"` + Trailer map[string]string `json:"trailer" binding:"omitempty"` + Reproducers HttpRequestMetadataReproducers `json:"reproducers"` +} + +type HttpRequestMetadataReproducers struct { + CurlCommand string `json:"curl_command"` + RequestsCode string `json:"requests_code"` + FetchRequest string `json:"fetch_request"` +} + +type HttpRequestParser struct { +} + +func (p HttpRequestParser) TryParse(content []byte) Metadata { + reader := bufio.NewReader(bytes.NewReader(content)) + request, err := http.ReadRequest(reader) + if err != nil { + return nil + } + var body string + if request.Body != nil { + if buffer, err := ioutil.ReadAll(request.Body); err == nil { + body = string(buffer) + } + _ = request.Body.Close() + } + _ = request.ParseForm() + + return HttpRequestMetadata{ + BasicMetadata: BasicMetadata{"http-request"}, + Method: request.Method, + URL: request.URL.String(), + Protocol: request.Proto, + Host: request.Host, + Headers: JoinArrayMap(request.Header), + Cookies: CookiesMap(request.Cookies()), + ContentLength: request.ContentLength, + FormData: JoinArrayMap(request.Form), + Body: body, + Trailer: JoinArrayMap(request.Trailer), + Reproducers: HttpRequestMetadataReproducers{ + CurlCommand: curlCommand(request), + RequestsCode: requestsCode(request), + FetchRequest: fetchRequest(request, body), + }, + } +} + +func curlCommand(request *http.Request) string { + if command, err := http2curl.GetCurlCommand(request); err == nil { + return command.String() + } else { + return "invalid-request" + } +} + +func requestsCode(request *http.Request) string { + var b strings.Builder + var params string + if request.Form != nil { + params = toJson(JoinArrayMap(request.PostForm)) + } + headers := toJson(JoinArrayMap(request.Header)) + cookies := toJson(CookiesMap(request.Cookies())) + + b.WriteString("import requests\n\nresponse = requests." + strings.ToLower(request.Method) + "(") + b.WriteString("\"" + request.URL.String() + "\"") + if params != "" { + b.WriteString(", data = " + params) + } + if headers != "" { + b.WriteString(", headers = " + headers) + } + if cookies != "" { + b.WriteString(", cookies = " + cookies) + } + b.WriteString(")\n") + b.WriteString(` +# print(response.url) +# print(response.text) +# print(response.content) +# print(response.json()) +# print(response.raw) +# print(response.status_code) +# print(response.cookies) +# print(response.history) +`) + + return b.String() +} + +func fetchRequest(request *http.Request, body string) string { + headers := JoinArrayMap(request.Header) + data := make(map[string]interface{}) + data["headers"] = headers + if referrer := request.Header.Get("referrer"); referrer != "" { + data["Referrer"] = referrer + } + // TODO: referrerPolicy + if body == "" { + data["body"] = nil + } else { + data["body"] = body + } + data["method"] = request.Method + // TODO: mode + + if jsonData := toJson(data); jsonData != "" { + return "fetch(\"" + request.URL.String() + "\", " + jsonData + ");" + } else { + return "invalid-request" + } +} + +func toJson(obj interface{}) string { + if buffer, err := json.Marshal(obj); err == nil { + return string(buffer) + } else { + return "" + } +} diff --git a/parsers/http_response_parser.go b/parsers/http_response_parser.go new file mode 100644 index 0000000..a639dec --- /dev/null +++ b/parsers/http_response_parser.go @@ -0,0 +1,72 @@ +package parsers + +import ( + "bufio" + "bytes" + "compress/gzip" + "io/ioutil" + "net/http" +) + +type HttpResponseMetadata struct { + BasicMetadata + Status string `json:"status"` + StatusCode int `json:"status_code"` + Protocol string `json:"protocol"` + Headers map[string]string `json:"headers"` + ConnectionClosed bool `json:"connection_closed"` + Cookies map[string]string `json:"cookies" binding:"omitempty"` + Location string `json:"location" binding:"omitempty"` + Compressed bool `json:"compressed"` + Body string `json:"body" binding:"omitempty"` + Trailer map[string]string `json:"trailer" binding:"omitempty"` +} + +type HttpResponseParser struct { +} + +func (p HttpResponseParser) TryParse(content []byte) Metadata { + reader := bufio.NewReader(bytes.NewReader(content)) + response, err := http.ReadResponse(reader, nil) + if err != nil { + return nil + } + var body string + var compressed bool + if response.Body != nil { + switch response.Header.Get("Content-Encoding") { + case "gzip": + if gzipReader, err := gzip.NewReader(response.Body); err == nil { + if buffer, err := ioutil.ReadAll(gzipReader); err == nil { + body = string(buffer) + compressed = true + } + _ = gzipReader.Close() + } + default: + if buffer, err := ioutil.ReadAll(response.Body); err == nil { + body = string(buffer) + } + } + _ = response.Body.Close() + } + + var location string + if locationUrl, err := response.Location(); err == nil { + location = locationUrl.String() + } + + return HttpResponseMetadata{ + BasicMetadata: BasicMetadata{"http-response"}, + Status: response.Status, + StatusCode: response.StatusCode, + Protocol: response.Proto, + Headers: JoinArrayMap(response.Header), + ConnectionClosed: response.Close, + Cookies: CookiesMap(response.Cookies()), + Location: location, + Compressed: compressed, + Body: body, + Trailer: JoinArrayMap(response.Trailer), + } +} diff --git a/parsers/parser.go b/parsers/parser.go new file mode 100644 index 0000000..06cc0dc --- /dev/null +++ b/parsers/parser.go @@ -0,0 +1,28 @@ +package parsers + +type Parser interface { + TryParse(content []byte) Metadata + +} + +type Metadata interface { +} + +type BasicMetadata struct { + Type string `json:"type"` +} + +var parsers = []Parser{ // order matter + HttpRequestParser{}, + HttpResponseParser{}, +} + +func Parse(content []byte) Metadata { + for _, parser := range parsers { + if metadata := parser.TryParse(content); metadata != nil { + return metadata + } + } + + return nil +} diff --git a/parsers/parser_utils.go b/parsers/parser_utils.go new file mode 100644 index 0000000..b688262 --- /dev/null +++ b/parsers/parser_utils.go @@ -0,0 +1,24 @@ +package parsers + +import ( + "net/http" + "strings" +) + +func JoinArrayMap(obj map[string][]string) map[string]string { + headers := make(map[string]string, len(obj)) + for key, value := range obj { + headers[key] = strings.Join(value, ";") + } + + return headers +} + +func CookiesMap(cookiesArray []*http.Cookie) map[string]string { + cookies := make(map[string]string, len(cookiesArray)) + for _, cookie := range cookiesArray { + cookies[cookie.Name] = cookie.Value + } + + return cookies +} @@ -13,11 +13,6 @@ import ( "net" "os" "time" - "net/http" - "bufio" - "strings" - "io/ioutil" - "compress/gzip" ) func Sha256Sum(fileName string) (string, error) { @@ -113,83 +108,6 @@ func DecodeBytes(buffer []byte, format string) string { } } -func ReadRequest(raw string) http.Request { - reader := bufio.NewReader(strings.NewReader(raw)) - req,err := http.ReadRequest(reader) - if err != nil{ - log.Info("Reading request: ",req) - return http.Request{} - } - return *req -} - -func GetHeader(raw string) string{ - tmp := strings.Split(raw,"\r\n") - end := len(tmp) - for i, line := range tmp{ - if line == ""{ - end = i - break - } - } - return strings.Join(tmp[:end],"\r\n") -} - -func GetBody(raw string) string{ - tmp := strings.Split(raw,"\r\n") - start := 0 - for i, line := range tmp{ - if line == ""{ - start = i + 2 - break - } - } - return strings.Join(tmp[start:],"\r\n") -} - -func DecodeHttpResponse(raw string) string { - body := []byte{} - reader := bufio.NewReader(strings.NewReader(raw)) - resp,err := http.ReadResponse(reader, &http.Request{}) - if err != nil{ - log.Info("Reading response: ",resp) - return "" - } - - defer resp.Body.Close() - - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - var bodyReader io.ReadCloser - switch resp.Header.Get("Content-Encoding") { - case "gzip": - bodyReader, err = gzip.NewReader(resp.Body) - if err != nil { - log.Error("Gunzipping body: ",err) - } - defer bodyReader.Close() - body, err = ioutil.ReadAll(bodyReader) - if err != nil{ - log.Error("Reading gzipped body: ",err) - // if the response is malformed - // or the connection is closed - fallbackReader, _ := gzip.NewReader(strings.NewReader(GetBody(raw))) - body, err = ioutil.ReadAll(fallbackReader) - if err != nil{ - log.Error(string(body)) - } - } - default: - bodyReader = resp.Body - body, err = ioutil.ReadAll(bodyReader) - if err != nil{ - log.Error("Reading body: ",err) - body = []byte(GetBody(raw)) - } - } - } - return GetHeader(raw) + "\r\n\r\n"+ string(body) -} - func CopyFile(dst, src string) error { in, err := os.Open(src) if err != nil { |