mirror of https://github.com/citusdata/citus.git
588 lines
16 KiB
C
588 lines
16 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* statistics_collection.c
|
|
* Anonymous reports and statistics collection.
|
|
*
|
|
* Copyright (c) 2017, Citus Data, Inc.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "citus_version.h"
|
|
#include "fmgr.h"
|
|
#include "utils/uuid.h"
|
|
|
|
#if PG_VERSION_NUM >= 100000
|
|
#include "utils/backend_random.h"
|
|
#endif
|
|
|
|
bool EnableStatisticsCollection = true; /* send basic usage statistics to Citus */
|
|
|
|
PG_FUNCTION_INFO_V1(citus_server_id);
|
|
|
|
#ifdef HAVE_LIBCURL
|
|
|
|
#include <curl/curl.h>
|
|
#include <sys/utsname.h>
|
|
|
|
#include "access/xact.h"
|
|
#include "distributed/metadata_cache.h"
|
|
#include "distributed/multi_join_order.h"
|
|
#include "distributed/shardinterval_utils.h"
|
|
#include "distributed/statistics_collection.h"
|
|
#include "distributed/worker_manager.h"
|
|
#include "lib/stringinfo.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/json.h"
|
|
#include "utils/jsonb.h"
|
|
|
|
#if PG_VERSION_NUM >= 100000
|
|
#include "utils/fmgrprotos.h"
|
|
#endif
|
|
|
|
static size_t StatisticsCallback(char *contents, size_t size, size_t count,
|
|
void *userData);
|
|
static size_t CheckForUpdatesCallback(char *contents, size_t size, size_t count,
|
|
void *userData);
|
|
static bool JsonbFieldInt32(Jsonb *jsonb, const char *fieldName, int32 *result);
|
|
static bool JsonbFieldStr(Jsonb *jsonb, const char *fieldName, StringInfo result);
|
|
static uint64 NextPow2(uint64 n);
|
|
static uint64 DistributedTablesSize(List *distTableOids);
|
|
static bool UrlEncode(StringInfo buf, const char *str);
|
|
static bool SendHttpPostJsonRequest(const char *url, const char *postFields,
|
|
long timeoutSeconds,
|
|
curl_write_callback responseCallback);
|
|
static bool SendHttpGetJsonRequest(const char *url, long timeoutSeconds,
|
|
curl_write_callback responseCallback);
|
|
static bool PerformHttpRequest(CURL *curl);
|
|
|
|
|
|
/* WarnIfSyncDNS warns if libcurl is compiled with synchronous DNS. */
|
|
void
|
|
WarnIfSyncDNS(void)
|
|
{
|
|
curl_version_info_data *versionInfo = curl_version_info(CURLVERSION_NOW);
|
|
if (!(versionInfo->features & CURL_VERSION_ASYNCHDNS))
|
|
{
|
|
ereport(WARNING, (errmsg("your current libcurl version doesn't support "
|
|
"asynchronous DNS, which might cause unexpected "
|
|
"delays in the operation of Citus"),
|
|
errhint("Install a libcurl version with asynchronous DNS "
|
|
"support.")));
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* CollectBasicUsageStatistics sends basic usage statistics to Citus servers.
|
|
* This includes Citus version, table count rounded to next power of 2, cluster
|
|
* size rounded to next power of 2, worker node count, and uname data. Returns
|
|
* true if we actually have sent statistics to the server.
|
|
*/
|
|
bool
|
|
CollectBasicUsageStatistics(void)
|
|
{
|
|
List *distTableOids = NIL;
|
|
uint64 roundedDistTableCount = 0;
|
|
uint64 roundedClusterSize = 0;
|
|
uint32 workerNodeCount = 0;
|
|
StringInfo fields = makeStringInfo();
|
|
Datum metadataJsonbDatum = 0;
|
|
char *metadataJsonbStr = NULL;
|
|
MemoryContext savedContext = CurrentMemoryContext;
|
|
struct utsname unameData;
|
|
int unameResult PG_USED_FOR_ASSERTS_ONLY = 0;
|
|
bool metadataCollectionFailed = false;
|
|
memset(&unameData, 0, sizeof(unameData));
|
|
|
|
PG_TRY();
|
|
{
|
|
distTableOids = DistTableOidList();
|
|
roundedDistTableCount = NextPow2(list_length(distTableOids));
|
|
roundedClusterSize = NextPow2(DistributedTablesSize(distTableOids));
|
|
workerNodeCount = ActivePrimaryNodeCount();
|
|
metadataJsonbDatum = DistNodeMetadata();
|
|
metadataJsonbStr = DatumGetCString(DirectFunctionCall1(jsonb_out,
|
|
metadataJsonbDatum));
|
|
}
|
|
PG_CATCH();
|
|
{
|
|
ErrorData *edata = NULL;
|
|
MemoryContextSwitchTo(savedContext);
|
|
edata = CopyErrorData();
|
|
FlushErrorState();
|
|
|
|
/* rethrow as WARNING */
|
|
edata->elevel = WARNING;
|
|
ThrowErrorData(edata);
|
|
metadataCollectionFailed = true;
|
|
}
|
|
PG_END_TRY();
|
|
|
|
/*
|
|
* Returning here instead of in PG_CATCH() since PG_END_TRY() resets couple
|
|
* of global variables.
|
|
*/
|
|
if (metadataCollectionFailed)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
unameResult = uname(&unameData);
|
|
Assert(unameResult == 0); /* uname() always succeeds if we pass valid buffer */
|
|
|
|
appendStringInfoString(fields, "{\"citus_version\": ");
|
|
escape_json(fields, CITUS_VERSION);
|
|
appendStringInfo(fields, ",\"table_count\": " UINT64_FORMAT, roundedDistTableCount);
|
|
appendStringInfo(fields, ",\"cluster_size\": " UINT64_FORMAT, roundedClusterSize);
|
|
appendStringInfo(fields, ",\"worker_node_count\": %u", workerNodeCount);
|
|
appendStringInfoString(fields, ",\"os_name\": ");
|
|
escape_json(fields, unameData.sysname);
|
|
appendStringInfoString(fields, ",\"os_release\": ");
|
|
escape_json(fields, unameData.release);
|
|
appendStringInfoString(fields, ",\"hwid\": ");
|
|
escape_json(fields, unameData.machine);
|
|
appendStringInfo(fields, ",\"node_metadata\": %s", metadataJsonbStr);
|
|
appendStringInfoString(fields, "}");
|
|
|
|
return SendHttpPostJsonRequest(STATS_COLLECTION_HOST "/v1/usage_reports",
|
|
fields->data, HTTP_TIMEOUT_SECONDS,
|
|
StatisticsCallback);
|
|
}
|
|
|
|
|
|
/*
|
|
* StatisticsCallback receives the response for the request sent by
|
|
* CollectBasicUsageStatistics. For now, it doesn't check the contents of the
|
|
* response and succeeds for any response.
|
|
*/
|
|
static size_t
|
|
StatisticsCallback(char *contents, size_t size, size_t count, void *userData)
|
|
{
|
|
return size * count;
|
|
}
|
|
|
|
|
|
/* CheckForUpdates queries Citus servers for newer releases of Citus. */
|
|
void
|
|
CheckForUpdates(void)
|
|
{
|
|
StringInfo url = makeStringInfo();
|
|
appendStringInfoString(url, STATS_COLLECTION_HOST "/v1/releases/latest?edition=");
|
|
|
|
if (!UrlEncode(url, CITUS_EDITION))
|
|
{
|
|
ereport(WARNING, (errmsg("url encoding '%s' failed", CITUS_EDITION)));
|
|
return;
|
|
}
|
|
|
|
if (!SendHttpGetJsonRequest(url->data, HTTP_TIMEOUT_SECONDS,
|
|
&CheckForUpdatesCallback))
|
|
{
|
|
ereport(WARNING, (errmsg("checking for updates failed")));
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* CheckForUpdatesCallback receives the response for the request sent by
|
|
* CheckForUpdates(). It processes the response, and if there is a newer release
|
|
* of Citus available, logs a LOG message. This function returns 0 if there are
|
|
* any errors in the received response, which means we didn't consume the data.
|
|
* Otherwise, it returns (size * count) which means we consumed all of the data.
|
|
*/
|
|
static size_t
|
|
CheckForUpdatesCallback(char *contents, size_t size, size_t count, void *userData)
|
|
{
|
|
const int32 citusVersionMajor = CITUS_VERSION_NUM / 10000;
|
|
const int32 citusVersionMinor = (CITUS_VERSION_NUM / 100) % 100;
|
|
const int32 citusVersionPatch = CITUS_VERSION_NUM % 100;
|
|
Jsonb *responseJsonb = NULL;
|
|
StringInfo releaseVersion = makeStringInfo();
|
|
int32 releaseMajor = 0;
|
|
int32 releaseMinor = 0;
|
|
int32 releasePatch = 0;
|
|
char *updateType = NULL;
|
|
MemoryContext savedContext = CurrentMemoryContext;
|
|
|
|
StringInfo responseNullTerminated = makeStringInfo();
|
|
appendBinaryStringInfo(responseNullTerminated, contents, size * count);
|
|
|
|
/* jsonb_in can throw errors */
|
|
PG_TRY();
|
|
{
|
|
Datum responseCStringDatum = CStringGetDatum(responseNullTerminated->data);
|
|
Datum responseJasonbDatum = DirectFunctionCall1(jsonb_in, responseCStringDatum);
|
|
responseJsonb = DatumGetJsonb(responseJasonbDatum);
|
|
}
|
|
PG_CATCH();
|
|
{
|
|
MemoryContextSwitchTo(savedContext);
|
|
FlushErrorState();
|
|
responseJsonb = NULL;
|
|
}
|
|
PG_END_TRY();
|
|
|
|
/*
|
|
* Returning here instead of in PG_CATCH() because PG_END_TRY() resets
|
|
* couple of global variables.
|
|
*/
|
|
if (responseJsonb == NULL)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
if (!JsonbFieldStr(responseJsonb, "version", releaseVersion) ||
|
|
!JsonbFieldInt32(responseJsonb, "major", &releaseMajor) ||
|
|
!JsonbFieldInt32(responseJsonb, "minor", &releaseMinor) ||
|
|
!JsonbFieldInt32(responseJsonb, "patch", &releasePatch))
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
if ((releaseMajor > citusVersionMajor) ||
|
|
(releaseMajor == citusVersionMajor && releaseMinor > citusVersionMinor))
|
|
{
|
|
updateType = "major";
|
|
}
|
|
else if (releaseMajor == citusVersionMajor &&
|
|
releaseMinor == citusVersionMinor &&
|
|
releasePatch > citusVersionPatch)
|
|
{
|
|
updateType = "patch";
|
|
}
|
|
|
|
if (updateType != NULL)
|
|
{
|
|
ereport(LOG, (errmsg("a new %s release of Citus (%s) is available",
|
|
updateType, releaseVersion->data)));
|
|
}
|
|
|
|
return size * count;
|
|
}
|
|
|
|
|
|
/*
|
|
* JsonbFieldInt32 sets the given output variable to the int32 value of the given
|
|
* field in the given JSONB object. If the field doesn't exist or its value is
|
|
* not an integer that fits in 32-bits, this function returns false.
|
|
*/
|
|
static bool
|
|
JsonbFieldInt32(Jsonb *jsonb, const char *fieldName, int32 *result)
|
|
{
|
|
MemoryContext savedContext = CurrentMemoryContext;
|
|
bool success = false;
|
|
JsonbValue *fieldValue = NULL;
|
|
JsonbValue key;
|
|
memset(&key, 0, sizeof(key));
|
|
key.type = jbvString;
|
|
key.val.string.len = strlen(fieldName);
|
|
key.val.string.val = (char *) fieldName;
|
|
|
|
fieldValue = findJsonbValueFromContainer(&(jsonb->root), JB_FOBJECT, &key);
|
|
if (fieldValue == NULL || fieldValue->type != jbvNumeric)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* numeric_int4 can throw errors */
|
|
PG_TRY();
|
|
{
|
|
Datum resultNumericDatum = NumericGetDatum(fieldValue->val.numeric);
|
|
*result = DatumGetInt32(DirectFunctionCall1(numeric_int4, resultNumericDatum));
|
|
success = true;
|
|
}
|
|
PG_CATCH();
|
|
{
|
|
MemoryContextSwitchTo(savedContext);
|
|
FlushErrorState();
|
|
success = false;
|
|
}
|
|
PG_END_TRY();
|
|
|
|
return success;
|
|
}
|
|
|
|
|
|
/*
|
|
* JsonbFieldStr appends string value of the given field in the given JSONB
|
|
* object to the given string buffer. If the field doesn't exist or its value is
|
|
* not string, this function returns false. Otherwise it returns true.
|
|
*/
|
|
static bool
|
|
JsonbFieldStr(Jsonb *jsonb, const char *fieldName, StringInfo result)
|
|
{
|
|
JsonbValue *fieldValue = NULL;
|
|
JsonbValue key;
|
|
memset(&key, 0, sizeof(key));
|
|
key.type = jbvString;
|
|
key.val.string.len = strlen(fieldName);
|
|
key.val.string.val = (char *) fieldName;
|
|
|
|
fieldValue = findJsonbValueFromContainer(&(jsonb->root), JB_FOBJECT, &key);
|
|
if (fieldValue == NULL || fieldValue->type != jbvString)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
appendBinaryStringInfo(result, fieldValue->val.string.val,
|
|
fieldValue->val.string.len);
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* DistributedTablesSize returns total size of data store in the cluster consisting
|
|
* of given distributed tables. We ignore tables which we cannot get their size.
|
|
*/
|
|
static uint64
|
|
DistributedTablesSize(List *distTableOids)
|
|
{
|
|
uint64 totalSize = 0;
|
|
ListCell *distTableOidCell = NULL;
|
|
|
|
foreach(distTableOidCell, distTableOids)
|
|
{
|
|
Oid relationId = lfirst_oid(distTableOidCell);
|
|
Datum tableSizeDatum = 0;
|
|
|
|
/*
|
|
* Relations can get dropped after getting the Oid list and before we
|
|
* reach here. Acquire a lock to make sure the relation is available
|
|
* while we are getting its size.
|
|
*/
|
|
Relation relation = try_relation_open(relationId, AccessShareLock);
|
|
if (relation == NULL)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Ignore hash partitioned tables with size greater than 1, since
|
|
* citus_table_size() doesn't work on them.
|
|
*/
|
|
if (PartitionMethod(relationId) == DISTRIBUTE_BY_HASH &&
|
|
!SingleReplicatedTable(relationId))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
tableSizeDatum = DirectFunctionCall1(citus_table_size,
|
|
ObjectIdGetDatum(relationId));
|
|
totalSize += DatumGetInt64(tableSizeDatum);
|
|
heap_close(relation, AccessShareLock);
|
|
}
|
|
|
|
return totalSize;
|
|
}
|
|
|
|
|
|
/*
|
|
* NextPow2 returns smallest power of 2 less than or equal to n. If n is greater
|
|
* than 2^63, it returns 2^63. Returns 0 when n is 0.
|
|
*/
|
|
static uint64
|
|
NextPow2(uint64 n)
|
|
{
|
|
uint64 result = 1;
|
|
|
|
if (n == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
/* if there is no 64-bit power of 2 greater than n, return 2^63 */
|
|
if (n > (1ull << 63))
|
|
{
|
|
return (1ull << 63);
|
|
}
|
|
|
|
while (result < n)
|
|
{
|
|
result *= 2;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* UrlEncode URL encodes the given string and appends it to the given buffer.
|
|
* If either libcurl initialization or encoding fails, returns false.
|
|
*/
|
|
static bool
|
|
UrlEncode(StringInfo buf, const char *str)
|
|
{
|
|
bool success = false;
|
|
CURL *curl = NULL;
|
|
|
|
curl_global_init(CURL_GLOBAL_DEFAULT);
|
|
curl = curl_easy_init();
|
|
if (curl)
|
|
{
|
|
char *urlEncodedStr = curl_easy_escape(curl, str, strlen(str));
|
|
if (urlEncodedStr)
|
|
{
|
|
appendStringInfoString(buf, urlEncodedStr);
|
|
curl_free(urlEncodedStr);
|
|
success = true;
|
|
}
|
|
|
|
curl_easy_cleanup(curl);
|
|
}
|
|
|
|
curl_global_cleanup();
|
|
return success;
|
|
}
|
|
|
|
|
|
/*
|
|
* SendHttpPostJsonRequest sends a HTTP/HTTPS POST request to the given URL with
|
|
* the given json object. responseCallback is called with the content of response.
|
|
*/
|
|
static bool
|
|
SendHttpPostJsonRequest(const char *url, const char *jsonObj, long timeoutSeconds,
|
|
curl_write_callback responseCallback)
|
|
{
|
|
bool success = false;
|
|
CURL *curl = NULL;
|
|
|
|
curl_global_init(CURL_GLOBAL_DEFAULT);
|
|
curl = curl_easy_init();
|
|
if (curl)
|
|
{
|
|
struct curl_slist *headers = NULL;
|
|
headers = curl_slist_append(headers, "Accept: application/json");
|
|
headers = curl_slist_append(headers, "Content-Type: application/json");
|
|
headers = curl_slist_append(headers, "charsets: utf-8");
|
|
|
|
curl_easy_setopt(curl, CURLOPT_URL, url);
|
|
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, jsonObj);
|
|
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeoutSeconds);
|
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, responseCallback);
|
|
|
|
success = PerformHttpRequest(curl);
|
|
|
|
curl_slist_free_all(headers);
|
|
curl_easy_cleanup(curl);
|
|
}
|
|
|
|
curl_global_cleanup();
|
|
|
|
return success;
|
|
}
|
|
|
|
|
|
/*
|
|
* SendHttpGetJsonRequest sends an HTTP/HTTPS GET request to the given URL, and
|
|
* expects a JSON response from server. GET parameters should be added to the url.
|
|
* responseCallback is called with the content of response.
|
|
*/
|
|
static bool
|
|
SendHttpGetJsonRequest(const char *url, long timeoutSeconds,
|
|
curl_write_callback responseCallback)
|
|
{
|
|
bool success = false;
|
|
CURL *curl = NULL;
|
|
|
|
curl_global_init(CURL_GLOBAL_DEFAULT);
|
|
curl = curl_easy_init();
|
|
if (curl)
|
|
{
|
|
struct curl_slist *headers = NULL;
|
|
headers = curl_slist_append(headers, "Accept: application/json");
|
|
|
|
curl_easy_setopt(curl, CURLOPT_URL, url);
|
|
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeoutSeconds);
|
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, responseCallback);
|
|
|
|
success = PerformHttpRequest(curl);
|
|
|
|
curl_slist_free_all(headers);
|
|
curl_easy_cleanup(curl);
|
|
}
|
|
|
|
curl_global_cleanup();
|
|
|
|
return success;
|
|
}
|
|
|
|
|
|
/*
|
|
* PerformHttpRequest sends the HTTP request with the parameters set in the given
|
|
* curl object, and returns if it was successful or not. If the request was not
|
|
* successful, it may log some warnings. This method expects to take place after
|
|
* curl_easy_init() but before curl_easy_cleanup().
|
|
*/
|
|
static bool
|
|
PerformHttpRequest(CURL *curl)
|
|
{
|
|
bool success = false;
|
|
CURLcode curlCode = curl_easy_perform(curl);
|
|
if (curlCode == CURLE_OK)
|
|
{
|
|
int64 httpCode = 0;
|
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &httpCode);
|
|
if (httpCode == 200)
|
|
{
|
|
success = true;
|
|
}
|
|
else if (httpCode >= 400 && httpCode < 500)
|
|
{
|
|
ereport(WARNING, (errmsg("HTTP request failed."),
|
|
errhint("HTTP response code: " INT64_FORMAT,
|
|
httpCode)));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ereport(WARNING, (errmsg("Sending HTTP request failed."),
|
|
errhint("Error code: %s.", curl_easy_strerror(curlCode))));
|
|
}
|
|
|
|
return success;
|
|
}
|
|
|
|
|
|
#endif /* HAVE_LIBCURL */
|
|
|
|
/*
|
|
* citus_server_id returns a random UUID value as server identifier. This is
|
|
* modeled after PostgreSQL's pg_random_uuid().
|
|
*/
|
|
Datum
|
|
citus_server_id(PG_FUNCTION_ARGS)
|
|
{
|
|
uint8 *buf = (uint8 *) palloc(UUID_LEN);
|
|
|
|
#if PG_VERSION_NUM >= 100000
|
|
|
|
/*
|
|
* If pg_backend_random() fails, fall-back to using random(). In previous
|
|
* versions of postgres we don't have pg_backend_random(), so use it by
|
|
* default in that case.
|
|
*/
|
|
if (!pg_backend_random((char *) buf, UUID_LEN))
|
|
#endif
|
|
{
|
|
int bufIdx = 0;
|
|
for (bufIdx = 0; bufIdx < UUID_LEN; bufIdx++)
|
|
{
|
|
buf[bufIdx] = (uint8) (random() & 0xFF);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set magic numbers for a "version 4" (pseudorandom) UUID, see
|
|
* http://tools.ietf.org/html/rfc4122#section-4.4
|
|
*/
|
|
buf[6] = (buf[6] & 0x0f) | 0x40; /* "version" field */
|
|
buf[8] = (buf[8] & 0x3f) | 0x80; /* "variant" field */
|
|
|
|
PG_RETURN_UUID_P((pg_uuid_t *) buf);
|
|
}
|