aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmiliano Ciavatta2020-09-15 21:17:06 +0000
committerEmiliano Ciavatta2020-09-15 21:17:06 +0000
commit2954045cb28ea8cbf4dbd019355a2df8fed28ccc (patch)
tree780a6f7644661699281f39653e3e0be7c64aa025
parent4f70dbfb5519ae2a6e68869ecba0a9e4cfb3013b (diff)
Refactor gzip decoder, added parsers with reproducers
-rw-r--r--connection_streams_controller.go59
-rw-r--r--go.mod1
-rw-r--r--go.sum2
-rw-r--r--parsers/http_request_parser.go144
-rw-r--r--parsers/http_response_parser.go72
-rw-r--r--parsers/parser.go28
-rw-r--r--parsers/parser_utils.go24
-rw-r--r--utils.go82
8 files changed, 310 insertions, 102 deletions
diff --git a/connection_streams_controller.go b/connection_streams_controller.go
index 096210e..3ba30f8 100644
--- a/connection_streams_controller.go
+++ b/connection_streams_controller.go
@@ -1,7 +1,9 @@
package main
import (
+ "bytes"
"context"
+ "github.com/eciavatta/caronte/parsers"
log "github.com/sirupsen/logrus"
"time"
)
@@ -25,13 +27,13 @@ type ConnectionStream struct {
type PatternSlice [2]uint64
type Payload struct {
- FromClient bool `json:"from_client"`
- Content string `json:"content"`
- DecodedContent string `json:"decoded_content"`
- Index int `json:"index"`
- Timestamp time.Time `json:"timestamp"`
- IsRetransmitted bool `json:"is_retransmitted"`
- RegexMatches []RegexSlice `json:"regex_matches"`
+ FromClient bool `json:"from_client"`
+ Content string `json:"content"`
+ Metadata parsers.Metadata `json:"metadata"`
+ Index int `json:"index"`
+ Timestamp time.Time `json:"timestamp"`
+ IsRetransmitted bool `json:"is_retransmitted"`
+ RegexMatches []RegexSlice `json:"regex_matches"`
}
type RegexSlice struct {
@@ -56,8 +58,8 @@ func NewConnectionStreamsController(storage Storage) ConnectionStreamsController
}
func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, connectionID RowID,
- format QueryFormat) []Payload {
- payloads := make([]Payload, 0, InitialPayloadsSize)
+ format QueryFormat) []*Payload {
+ payloads := make([]*Payload, 0, InitialPayloadsSize)
var clientIndex, serverIndex, globalIndex uint64
if format.Limit <= 0 {
@@ -76,7 +78,11 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c
return serverBlocksIndex < len(serverStream.BlocksIndexes)
}
- var payload Payload
+ var payload *Payload
+ payloadsBuffer := make([]*Payload, 0, 16)
+ contentChunkBuffer := new(bytes.Buffer)
+ var lastContentSlice []byte
+ var sideChanged, lastClient, lastServer bool
for !clientStream.ID.IsZero() || !serverStream.ID.IsZero() {
if hasClientBlocks() && (!hasServerBlocks() || // next payload is from client
clientStream.BlocksTimestamps[clientBlocksIndex].UnixNano() <=
@@ -90,10 +96,9 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c
}
size := uint64(end - start)
- payload = Payload{
+ payload = &Payload{
FromClient: true,
Content: DecodeBytes(clientStream.Payload[start:end], format.Format),
- //Request: ReadRequest(content),
Index: start,
Timestamp: clientStream.BlocksTimestamps[clientBlocksIndex],
IsRetransmitted: clientStream.BlocksLoss[clientBlocksIndex],
@@ -102,6 +107,9 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c
clientIndex += size
globalIndex += size
clientBlocksIndex++
+
+ lastContentSlice = clientStream.Payload[start:end]
+ sideChanged, lastClient, lastServer = lastServer, true, false
} else { // next payload is from server
start := serverStream.BlocksIndexes[serverBlocksIndex]
end := 0
@@ -112,15 +120,9 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c
}
size := uint64(end - start)
- content := DecodeBytes(serverStream.Payload[start:end], format.Format)
-
- plainContent := DecodeBytes(serverStream.Payload[start:end], "default")
- decodedContent := DecodeBytes([]byte(DecodeHttpResponse(plainContent)), format.Format)
-
- payload = Payload{
+ payload = &Payload{
FromClient: false,
- Content: content,
- DecodedContent: decodedContent,
+ Content: DecodeBytes(serverStream.Payload[start:end], format.Format),
Index: start,
Timestamp: serverStream.BlocksTimestamps[serverBlocksIndex],
IsRetransmitted: serverStream.BlocksLoss[serverBlocksIndex],
@@ -129,12 +131,29 @@ func (csc ConnectionStreamsController) GetConnectionPayload(c context.Context, c
serverIndex += size
globalIndex += size
serverBlocksIndex++
+
+ lastContentSlice = serverStream.Payload[start:end]
+ sideChanged, lastClient, lastServer = lastClient, false, true
+ }
+
+ if sideChanged {
+ metadata := parsers.Parse(contentChunkBuffer.Bytes())
+ for _, elem := range payloadsBuffer {
+ elem.Metadata = metadata
+ }
+
+ payloadsBuffer = payloadsBuffer[:0]
+ contentChunkBuffer.Reset()
}
+ payloadsBuffer = append(payloadsBuffer, payload)
+ contentChunkBuffer.Write(lastContentSlice)
if globalIndex > format.Skip {
+ // problem: waste of time if the payload is discarded
payloads = append(payloads, payload)
}
if globalIndex > format.Skip+format.Limit {
+ // problem: the last chunk is not parsed, but can be ok because it is not finished
return payloads
}
diff --git a/go.mod b/go.mod
index 1281ae8..308b16b 100644
--- a/go.mod
+++ b/go.mod
@@ -17,4 +17,5 @@ require (
go.mongodb.org/mongo-driver v1.3.1
golang.org/x/net v0.0.0-20190620200207-3b0461eec859 // indirect
golang.org/x/sys v0.0.0-20200406155108-e3b113bbe6a4 // indirect
+ moul.io/http2curl v1.0.0
)
diff --git a/go.sum b/go.sum
index d17dea6..fd63c39 100644
--- a/go.sum
+++ b/go.sum
@@ -179,3 +179,5 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+moul.io/http2curl v1.0.0 h1:6XwpyZOYsgZJrU8exnG87ncVkU1FVCcTRpwzOkTDUi8=
+moul.io/http2curl v1.0.0/go.mod h1:f6cULg+e4Md/oW1cYmwW4IWQOVl2lGbmCNGOHvzX2kE=
diff --git a/parsers/http_request_parser.go b/parsers/http_request_parser.go
new file mode 100644
index 0000000..d204d4c
--- /dev/null
+++ b/parsers/http_request_parser.go
@@ -0,0 +1,144 @@
+package parsers
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/json"
+ "io/ioutil"
+ "moul.io/http2curl"
+ "net/http"
+ "strings"
+)
+
+type HttpRequestMetadata struct {
+ BasicMetadata
+ Method string `json:"method"`
+ URL string `json:"url"`
+ Protocol string `json:"protocol"`
+ Host string `json:"host"`
+ Headers map[string]string `json:"headers"`
+ Cookies map[string]string `json:"cookies" binding:"omitempty"`
+ ContentLength int64 `json:"content_length"`
+ FormData map[string]string `json:"form_data" binding:"omitempty"`
+ Body string `json:"body" binding:"omitempty"`
+ Trailer map[string]string `json:"trailer" binding:"omitempty"`
+ Reproducers HttpRequestMetadataReproducers `json:"reproducers"`
+}
+
+type HttpRequestMetadataReproducers struct {
+ CurlCommand string `json:"curl_command"`
+ RequestsCode string `json:"requests_code"`
+ FetchRequest string `json:"fetch_request"`
+}
+
+type HttpRequestParser struct {
+}
+
+func (p HttpRequestParser) TryParse(content []byte) Metadata {
+ reader := bufio.NewReader(bytes.NewReader(content))
+ request, err := http.ReadRequest(reader)
+ if err != nil {
+ return nil
+ }
+ var body string
+ if request.Body != nil {
+ if buffer, err := ioutil.ReadAll(request.Body); err == nil {
+ body = string(buffer)
+ }
+ _ = request.Body.Close()
+ }
+ _ = request.ParseForm()
+
+ return HttpRequestMetadata{
+ BasicMetadata: BasicMetadata{"http-request"},
+ Method: request.Method,
+ URL: request.URL.String(),
+ Protocol: request.Proto,
+ Host: request.Host,
+ Headers: JoinArrayMap(request.Header),
+ Cookies: CookiesMap(request.Cookies()),
+ ContentLength: request.ContentLength,
+ FormData: JoinArrayMap(request.Form),
+ Body: body,
+ Trailer: JoinArrayMap(request.Trailer),
+ Reproducers: HttpRequestMetadataReproducers{
+ CurlCommand: curlCommand(request),
+ RequestsCode: requestsCode(request),
+ FetchRequest: fetchRequest(request, body),
+ },
+ }
+}
+
+func curlCommand(request *http.Request) string {
+ if command, err := http2curl.GetCurlCommand(request); err == nil {
+ return command.String()
+ } else {
+ return "invalid-request"
+ }
+}
+
+func requestsCode(request *http.Request) string {
+ var b strings.Builder
+ var params string
+ if request.Form != nil {
+ params = toJson(JoinArrayMap(request.PostForm))
+ }
+ headers := toJson(JoinArrayMap(request.Header))
+ cookies := toJson(CookiesMap(request.Cookies()))
+
+ b.WriteString("import requests\n\nresponse = requests." + strings.ToLower(request.Method) + "(")
+ b.WriteString("\"" + request.URL.String() + "\"")
+ if params != "" {
+ b.WriteString(", data = " + params)
+ }
+ if headers != "" {
+ b.WriteString(", headers = " + headers)
+ }
+ if cookies != "" {
+ b.WriteString(", cookies = " + cookies)
+ }
+ b.WriteString(")\n")
+ b.WriteString(`
+# print(response.url)
+# print(response.text)
+# print(response.content)
+# print(response.json())
+# print(response.raw)
+# print(response.status_code)
+# print(response.cookies)
+# print(response.history)
+`)
+
+ return b.String()
+}
+
+func fetchRequest(request *http.Request, body string) string {
+ headers := JoinArrayMap(request.Header)
+ data := make(map[string]interface{})
+ data["headers"] = headers
+ if referrer := request.Header.Get("referrer"); referrer != "" {
+ data["Referrer"] = referrer
+ }
+ // TODO: referrerPolicy
+ if body == "" {
+ data["body"] = nil
+ } else {
+ data["body"] = body
+ }
+ data["method"] = request.Method
+ // TODO: mode
+
+ if jsonData := toJson(data); jsonData != "" {
+ return "fetch(\"" + request.URL.String() + "\", " + jsonData + ");"
+ } else {
+ return "invalid-request"
+ }
+}
+
+func toJson(obj interface{}) string {
+ if buffer, err := json.Marshal(obj); err == nil {
+ return string(buffer)
+ } else {
+ return ""
+ }
+}
diff --git a/parsers/http_response_parser.go b/parsers/http_response_parser.go
new file mode 100644
index 0000000..a639dec
--- /dev/null
+++ b/parsers/http_response_parser.go
@@ -0,0 +1,72 @@
+package parsers
+
+import (
+ "bufio"
+ "bytes"
+ "compress/gzip"
+ "io/ioutil"
+ "net/http"
+)
+
+type HttpResponseMetadata struct {
+ BasicMetadata
+ Status string `json:"status"`
+ StatusCode int `json:"status_code"`
+ Protocol string `json:"protocol"`
+ Headers map[string]string `json:"headers"`
+ ConnectionClosed bool `json:"connection_closed"`
+ Cookies map[string]string `json:"cookies" binding:"omitempty"`
+ Location string `json:"location" binding:"omitempty"`
+ Compressed bool `json:"compressed"`
+ Body string `json:"body" binding:"omitempty"`
+ Trailer map[string]string `json:"trailer" binding:"omitempty"`
+}
+
+type HttpResponseParser struct {
+}
+
+func (p HttpResponseParser) TryParse(content []byte) Metadata {
+ reader := bufio.NewReader(bytes.NewReader(content))
+ response, err := http.ReadResponse(reader, nil)
+ if err != nil {
+ return nil
+ }
+ var body string
+ var compressed bool
+ if response.Body != nil {
+ switch response.Header.Get("Content-Encoding") {
+ case "gzip":
+ if gzipReader, err := gzip.NewReader(response.Body); err == nil {
+ if buffer, err := ioutil.ReadAll(gzipReader); err == nil {
+ body = string(buffer)
+ compressed = true
+ }
+ _ = gzipReader.Close()
+ }
+ default:
+ if buffer, err := ioutil.ReadAll(response.Body); err == nil {
+ body = string(buffer)
+ }
+ }
+ _ = response.Body.Close()
+ }
+
+ var location string
+ if locationUrl, err := response.Location(); err == nil {
+ location = locationUrl.String()
+ }
+
+ return HttpResponseMetadata{
+ BasicMetadata: BasicMetadata{"http-response"},
+ Status: response.Status,
+ StatusCode: response.StatusCode,
+ Protocol: response.Proto,
+ Headers: JoinArrayMap(response.Header),
+ ConnectionClosed: response.Close,
+ Cookies: CookiesMap(response.Cookies()),
+ Location: location,
+ Compressed: compressed,
+ Body: body,
+ Trailer: JoinArrayMap(response.Trailer),
+ }
+}
diff --git a/parsers/parser.go b/parsers/parser.go
new file mode 100644
index 0000000..06cc0dc
--- /dev/null
+++ b/parsers/parser.go
@@ -0,0 +1,28 @@
+package parsers
+
+type Parser interface {
+ TryParse(content []byte) Metadata
+
+}
+
+type Metadata interface {
+}
+
+type BasicMetadata struct {
+ Type string `json:"type"`
+}
+
+var parsers = []Parser{ // order matter
+ HttpRequestParser{},
+ HttpResponseParser{},
+}
+
+func Parse(content []byte) Metadata {
+ for _, parser := range parsers {
+ if metadata := parser.TryParse(content); metadata != nil {
+ return metadata
+ }
+ }
+
+ return nil
+}
diff --git a/parsers/parser_utils.go b/parsers/parser_utils.go
new file mode 100644
index 0000000..b688262
--- /dev/null
+++ b/parsers/parser_utils.go
@@ -0,0 +1,24 @@
+package parsers
+
+import (
+ "net/http"
+ "strings"
+)
+
+func JoinArrayMap(obj map[string][]string) map[string]string {
+ headers := make(map[string]string, len(obj))
+ for key, value := range obj {
+ headers[key] = strings.Join(value, ";")
+ }
+
+ return headers
+}
+
+func CookiesMap(cookiesArray []*http.Cookie) map[string]string {
+ cookies := make(map[string]string, len(cookiesArray))
+ for _, cookie := range cookiesArray {
+ cookies[cookie.Name] = cookie.Value
+ }
+
+ return cookies
+}
diff --git a/utils.go b/utils.go
index b07244d..a14fdca 100644
--- a/utils.go
+++ b/utils.go
@@ -13,11 +13,6 @@ import (
"net"
"os"
"time"
- "net/http"
- "bufio"
- "strings"
- "io/ioutil"
- "compress/gzip"
)
func Sha256Sum(fileName string) (string, error) {
@@ -113,83 +108,6 @@ func DecodeBytes(buffer []byte, format string) string {
}
}
-func ReadRequest(raw string) http.Request {
- reader := bufio.NewReader(strings.NewReader(raw))
- req,err := http.ReadRequest(reader)
- if err != nil{
- log.Info("Reading request: ",req)
- return http.Request{}
- }
- return *req
-}
-
-func GetHeader(raw string) string{
- tmp := strings.Split(raw,"\r\n")
- end := len(tmp)
- for i, line := range tmp{
- if line == ""{
- end = i
- break
- }
- }
- return strings.Join(tmp[:end],"\r\n")
-}
-
-func GetBody(raw string) string{
- tmp := strings.Split(raw,"\r\n")
- start := 0
- for i, line := range tmp{
- if line == ""{
- start = i + 2
- break
- }
- }
- return strings.Join(tmp[start:],"\r\n")
-}
-
-func DecodeHttpResponse(raw string) string {
- body := []byte{}
- reader := bufio.NewReader(strings.NewReader(raw))
- resp,err := http.ReadResponse(reader, &http.Request{})
- if err != nil{
- log.Info("Reading response: ",resp)
- return ""
- }
-
- defer resp.Body.Close()
-
- if resp.StatusCode >= 200 && resp.StatusCode < 300 {
- var bodyReader io.ReadCloser
- switch resp.Header.Get("Content-Encoding") {
- case "gzip":
- bodyReader, err = gzip.NewReader(resp.Body)
- if err != nil {
- log.Error("Gunzipping body: ",err)
- }
- defer bodyReader.Close()
- body, err = ioutil.ReadAll(bodyReader)
- if err != nil{
- log.Error("Reading gzipped body: ",err)
- // if the response is malformed
- // or the connection is closed
- fallbackReader, _ := gzip.NewReader(strings.NewReader(GetBody(raw)))
- body, err = ioutil.ReadAll(fallbackReader)
- if err != nil{
- log.Error(string(body))
- }
- }
- default:
- bodyReader = resp.Body
- body, err = ioutil.ReadAll(bodyReader)
- if err != nil{
- log.Error("Reading body: ",err)
- body = []byte(GetBody(raw))
- }
- }
- }
- return GetHeader(raw) + "\r\n\r\n"+ string(body)
-}
-
func CopyFile(dst, src string) error {
in, err := os.Open(src)
if err != nil {