cow code analysis


COW is a HTTP proxy to simplify bypassing the great firewall. It tries to automatically identify blocked websites and only use parent proxy for those sites.

Features

Visited site recording

COW records all visited hosts and visit count in stat (which is a json file) under the same directory with config file.

How does COW detect blocked sites

Upon the following error, one domain is considered to be blocked

COW will retry HTTP request upon these errors, But if there’s some data sent back to the client, connection with the client will be dropped to signal error..

Server connection reset is usually reliable in detecting blocked sites. But timeout is not. COW tries to estimate timeout value every 30 seconds, in order to avoid considering normal sites as blocked when network condition is bad. Revert to direct access after two minutes upon first blockage is also to avoid mistakes.

If automatica timeout retry causes problem for you, try to change readTimeout and dialTimeout in configuration.

Example Configuration

# Line starting with # is comment and will be ignored
# Local proxy listen address
listen = http://127.0.0.1:7777

# SOCKS5 parent proxy
proxy = socks5://127.0.0.1:1080
# HTTP parent proxy
proxy = http://127.0.0.1:8080
proxy = http://user:password@127.0.0.1:8080
# shadowsocks parent proxy
proxy = ss://aes-128-cfb:password@1.2.3.4:8388
# cow parent proxy
proxy = cow://aes-128-cfb:password@1.2.3.4:8388

Implementation Design

COW uses separate goroutines to read client requests and server responses.

/*entrance*/
func main() {
	quit = make(chan struct{})
	// Parse flags after load config to allow override options in config
	cmdLineConfig := parseCmdLineConfig()
	if cmdLineConfig.PrintVer {
		printVersion()
		os.Exit(0)
	}
    //initialize listenProxy and backupParentPool
	parseConfig(cmdLineConfig.RcFile, cmdLineConfig)
	initSelfListenAddr()
	initParentPool()

	if config.Core > 0 {
		runtime.GOMAXPROCS(config.Core)
	}
    // handle signals for gracefully termination
	go sigHandler()
	var wg sync.WaitGroup
	wg.Add(len(listenProxy))
	for _, proxy := range listenProxy {
		go proxy.Serve(&wg, quit)
	}
	wg.Wait()
	if relaunch {
		info.Println("Relunching cow...")
		// Need to fork me.
		argv0, err := lookPath()
		if nil != err {
			errl.Println(err)
			return
		}

		err = syscall.Exec(argv0, os.Args, os.Environ())
		if err != nil {
			errl.Println(err)
		}
	}
	debug.Println("the main process is , exiting...")

}
//listen and serve proxy request from all clients
func (hp *httpProxy) Serve(wg *sync.WaitGroup, quit <-chan struct{}) {
    	defer func() {
    		wg.Done()
    	}()
    	ln, err := net.Listen("tcp", hp.addr)
    	if err != nil {
    		fmt.Println("listen http failed:", err)
    		return
    	}
    	var exit bool
    	go func() {
    		<-quit
    		exit = true
    		ln.Close()
    	}()
    	host, _, _ := net.SplitHostPort(hp.addr)
    	var pacURL string
    	if host == "" || host == "0.0.0.0" {
    		pacURL = fmt.Sprintf("http://<hostip>:%s/pac", hp.port)
    	} else if hp.addrInPAC == "" {
    		pacURL = fmt.Sprintf("http://%s/pac", hp.addr)
    	} else {
    		pacURL = fmt.Sprintf("http://%s/pac", hp.addrInPAC)
    	}
    	info.Printf("COW %s listen http %s, PAC url %s\n", version, hp.addr, pacURL)

    	for {
    		conn, err := ln.Accept()
    		if err != nil && !exit {
    			errl.Printf("http proxy(%s) accept %v\n", ln.Addr(), err)
    			if isErrTooManyOpenFd(err) {
    				connPool.CloseAll()
    			}
    			time.Sleep(time.Millisecond)
    			continue
    		}
    		if exit {
    			debug.Println("exiting the http listner")
    			break
    		}
    		c := newClientConn(conn, hp)
    		//for each connection from client,start goroutine
    		go c.serve()
    	}
    }
/*gorouting to serve request from one client*/
func (c *clientConn) serve() {
	var r Request
	var rp Response
	var sv *serverConn
	var err error

	var authed bool
	// For cow proxy server, authentication is done by matching password.
	if _, ok := c.proxy.(*cowProxy); ok {
		authed = true
	}

	defer func() {
		r.releaseBuf()
		c.Close()
	}()

	// Refer to implementation.md for the design choices on parsing the request
	// and response.
	for {

		if err = parseRequest(c, &r); err != nil {//parse request from client
			debug.Printf("cli(%s) parse request %v\n", c.RemoteAddr(), err)
			if err == io.EOF || isErrConnReset(err) {
				return
			}
			if err != errClientTimeout {
				sendErrorPage(c, "404 Bad request", "Bad request", err.Error())
				return
			}
			sendErrorPage(c, statusRequestTimeout, statusRequestTimeout,
				"Your browser didn't send a complete request in time.")
			return
		}

	retry:
	    //build server connection based on the parsed request
		if sv, err = c.getServerConn(&r); err != nil {
			if debug {
				debug.Printf("cli(%s) failed to get server conn %v\n", c.RemoteAddr(), &r)
			}
			// Failed connection will send error page back to the client.
			// For CONNECT, the client read buffer is released in copyClient2Server,
			// so can't go back to getRequest.
			if err == errPageSent && !r.isConnect {
				if r.hasBody() {
					// skip request body
					debug.Printf("cli(%s) skip request body %v\n", c.RemoteAddr(), &r)
					sendBody(SinkWriter{}, c.bufRd, int(r.ContLen), r.Chunking)
				}
				continue
			}
			return
		}
		//forward request from client to server and response from server to client
		if err = sv.doRequest(c, &r, &rp); err != nil {
			sv.Close()
			if c.shouldRetry(&r, sv, err) {
				goto retry
			} else if err == errPageSent && (!r.hasBody() || r.hasSent()) {
				// Can only continue if request has no body, or request body
				// has been read.
				continue
			}
			return
		}
		// Put server connection to pool, so other clients can use it.
		_, isCowConn := sv.Conn.(cowConn)
		if rp.ConnectionKeepAlive || isCowConn {
			if debug {
				debug.Printf("cli(%s) connPool put %s", c.RemoteAddr(), sv.hostPort)
			}
			// If the server connection is not going to be used soon,
			// release buffer before putting back to pool can save memory.
			sv.releaseBuf()
			connPool.Put(sv)
		} else {
			if debug {
				debug.Printf("cli(%s) server %s close conn\n", c.RemoteAddr(), sv.hostPort)
			}
			sv.Close()
		}
		if !r.ConnectionKeepAlive {
			if debug {
				debug.Printf("cli(%s) close connection\n", c.RemoteAddr())
			}
			return
		}
	}
}
// Connect to requested server according to whether it's marked as blocked.
// If direct connection fails, try parent proxies.
func (c *clientConn) connect(r *Request, siteInfo *VisitCnt) (srvconn net.Conn, err error) {
	var errMsg string
	if config.AlwaysProxy {
		if srvconn, err = parentProxy.connect(r.URL); err == nil {
			return
		}
		errMsg = genErrMsg(r, nil, "Parent proxy connection failed, always use parent proxy.")
		goto fail
	}
	if siteInfo.AsBlocked() && !parentProxy.empty() {
		// In case of connection error to socks server, fallback to direct connection
		if srvconn, err = parentProxy.connect(r.URL); err == nil {
			return
		}
		if srvconn, err = connectDirect(r.URL, siteInfo); err == nil {
			return
		}
		errMsg = genErrMsg(r, nil, "Parent proxy and direct connection failed, maybe blocked site.")
	} else {
		// In case of error on direction connection, try parent server
		if srvconn, err = connectDirect(r.URL, siteInfo); err == nil {
			return
		}
		// net.Dial does two things: DNS lookup and TCP connection.
		// GFW may cause failure here: make it time out or reset connection.
		// debug.Printf("type of err %T %v\n", err, err)

		// RST during TCP handshake is valid and would return as connection
		// refused error. My observation is that GFW does not use RST to stop
		// TCP handshake.
		// To simplify things and avoid error in my observation, always try
		// parent proxy in case of Dial error.
		var socksErr error
		if srvconn, socksErr = parentProxy.connect(r.URL); socksErr == nil {
			c.handleBlockedRequest(r, err)//flag as blocked
			if debug {
				debug.Printf("cli(%s) direct connection failed, use parent proxy for %v\n",
					c.RemoteAddr(), r)
			}
			return srvconn, nil
		}
		errMsg = genErrMsg(r, nil,
			"Direct and parent proxy connection failed, maybe blocked site.")
	}

fail:
	sendErrorPage(c, "504 Connection failed", err.Error(), errMsg)
	return nil, errPageSent
}