From 6045e2323c712f067b26e503cdfce0a64471d34f Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Tue, 4 Jun 2024 07:17:55 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 9 ++++- pnpm-lock.yaml | 35 +++++++++++++++++ readme.md | 5 ++- src/url-utils.ts | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ src/utils.ts | 5 +++ 5 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 src/url-utils.ts diff --git a/package.json b/package.json index 7728f8b..4684a17 100644 --- a/package.json +++ b/package.json @@ -66,9 +66,14 @@ "@nangohq/node": "^0.39.32", "dedent": "^1.5.3", "delay": "^6.0.0", + "hash-object": "^5.0.1", + "is-relative-url": "^4.0.0", "jsonrepair": "^3.6.1", "ky": "^1.2.4", + "normalize-url": "^8.0.1", + "p-map": "^7.0.2", "p-throttle": "^6.1.0", + "quick-lru": "^7.0.0", "twitter-api-sdk": "^1.2.1", "type-fest": "^4.18.3", "zod": "^3.23.3", @@ -105,8 +110,8 @@ "@dexaai/dexter": "^2.0.3", "@genkit-ai/ai": "^0.5.2", "@langchain/core": "^0.2.5", - "expr-eval": "^2.0.2", - "ai": "^3.1.22" + "ai": "^3.1.22", + "expr-eval": "^2.0.2" }, "peerDependenciesMeta": { "@dexaai/dexter": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a16bcc4..737a2c4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -20,15 +20,30 @@ importers: delay: specifier: ^6.0.0 version: 6.0.0 + hash-object: + specifier: ^5.0.1 + version: 5.0.1 + is-relative-url: + specifier: ^4.0.0 + version: 4.0.0 jsonrepair: specifier: ^3.6.1 version: 3.8.0 ky: specifier: ^1.2.4 version: 1.3.0 + normalize-url: + specifier: ^8.0.1 + version: 8.0.1 + p-map: + specifier: ^7.0.2 + version: 7.0.2 p-throttle: specifier: ^6.1.0 version: 6.1.0 + quick-lru: + specifier: ^7.0.0 + version: 7.0.0 twitter-api-sdk: specifier: ^1.2.1 version: 1.2.1 @@ -2320,6 +2335,10 @@ packages: resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==} engines: {node: '>= 0.10'} + is-absolute-url@4.0.1: + resolution: {integrity: sha512-/51/TKE88Lmm7Gc4/8btclNXWS+g50wXhYJq8HWIBAGUBnoAdRu1aXeh364t/O7wXDAcTJDP8PNuNKWUDWie+A==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + is-any-array@2.0.1: resolution: {integrity: sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==} @@ -2492,6 +2511,10 @@ packages: resolution: {integrity: sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg==} engines: {node: '>= 0.4'} + is-relative-url@4.0.0: + resolution: {integrity: sha512-PkzoL1qKAYXNFct5IKdKRH/iBQou/oCC85QhXj6WKtUQBliZ4Yfd3Zk27RHu9KQG8r6zgvAA2AQKC9p+rqTszg==} + engines: {node: '>=14.16'} + is-scoped@3.0.0: resolution: {integrity: sha512-ezxLUq30kiTvP0w/5n9tj4qTOKlrA07Oty1hwTQ+lcqw11x6uc8sp7VRb2OVGRzKfCHZ2A22T5Zsau/Q2Akb0g==} engines: {node: '>=12'} @@ -3529,6 +3552,10 @@ packages: resolution: {integrity: sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==} engines: {node: '>=10'} + quick-lru@7.0.0: + resolution: {integrity: sha512-MX8gB7cVYTrYcFfAnfLlhRd0+Toyl8yX8uBx1MrX7K0jegiz9TumwOK27ldXrgDlHRdVi+MqU9Ssw6dr4BNreg==} + engines: {node: '>=18'} + ramda@0.29.1: resolution: {integrity: sha512-OfxIeWzd4xdUNxlWhgFazxsA/nl3mS4/jGZI5n00uWOoSSFRhC1b6gl6xvmzUamgmqELraWp0J/qqVlXYPDPyA==} @@ -7005,6 +7032,8 @@ snapshots: ipaddr.js@1.9.1: {} + is-absolute-url@4.0.1: {} + is-any-array@2.0.1: {} is-array-buffer@3.0.4: @@ -7140,6 +7169,10 @@ snapshots: call-bind: 1.0.7 has-tostringtag: 1.0.2 + is-relative-url@4.0.0: + dependencies: + is-absolute-url: 4.0.1 + is-scoped@3.0.0: dependencies: scoped-regex: 3.0.0 @@ -8088,6 +8121,8 @@ snapshots: quick-lru@5.1.1: {} + quick-lru@7.0.0: {} + ramda@0.29.1: {} range-parser@1.2.1: {} diff --git a/readme.md b/readme.md index 70bdc30..2f8efd1 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@

- Agentic + Agentic

@@ -154,7 +154,6 @@ The SDK-specific imports are all isolated to keep the main `@agentic/stdlib` as - instructor-js - TODO - services - - calculator - e2b - search-and-scrape - replicate @@ -166,6 +165,8 @@ The SDK-specific imports are all isolated to keep the main `@agentic/stdlib` as - provide a converter for langchain `DynamicStructuredTool` - pull from other libs - pull from [nango](https://docs.nango.dev/integrations/overview) +- tools + - calculator - tools / chains / flows / runnables - market maps - https://github.com/causaly/zod-validation-error diff --git a/src/url-utils.ts b/src/url-utils.ts new file mode 100644 index 0000000..3fab95a --- /dev/null +++ b/src/url-utils.ts @@ -0,0 +1,99 @@ +import isRelativeUrlImpl from 'is-relative-url' +import normalizeUrlImpl, { type Options } from 'normalize-url' +import QuickLRU from 'quick-lru' + +import { hashObject } from './utils.js' + +const protocolAllowList = new Set(['https:', 'http:']) +const normalizedUrlCache = new QuickLRU({ + maxSize: 4000 +}) + +/** + * Checks if a URL is crawlable. + * + * @param url - URL string to check + * @returns whether the URL is crawlable + */ +export function isValidCrawlableUrl(url: string): boolean { + try { + if (!url || isRelativeUrl(url)) { + return false + } + + const parsedUrl = new URL(url) + if (!protocolAllowList.has(parsedUrl.protocol)) { + return false + } + + const normalizedUrl = normalizeUrl(url) + if (!normalizedUrl) { + return false + } + + return true + } catch { + return false + } +} + +export function isRelativeUrl(url: string): boolean { + if (!url || typeof url !== 'string') return false + + return isRelativeUrlImpl(url) && !url.startsWith('//') +} + +/** + * Normalizes a URL string. + * + * @param url - URL string to normalize + * @param options - options for normalization. + * @returns normalized URL string or null if an invalid URL was passed + */ +export function normalizeUrl( + url: string, + options?: Options +): string | undefined { + let normalizedUrl: string | undefined + let cacheKey: string | undefined + + try { + if (!url || isRelativeUrl(url)) { + return + } + + const opts = { + stripWWW: false, + defaultProtocol: 'https', + normalizeProtocol: true, + forceHttps: false, + stripHash: false, + stripTextFragment: true, + removeQueryParameters: [/^utm_\w+/i, 'ref', 'ref_src'], + removeTrailingSlash: true, + removeSingleSlash: true, + removeExplicitPort: true, + sortQueryParameters: true, + ...options + } as Required + + const optionsHash = hashObject(opts) + cacheKey = `${url}-${optionsHash}` + normalizedUrl = normalizedUrlCache.get(cacheKey) + + if (normalizedUrl !== undefined) { + return normalizedUrl + } + + normalizedUrl = normalizeUrlImpl(url, opts) + } catch { + // ignore invalid urls + normalizedUrl = undefined + } + + if (cacheKey) { + normalizedUrlCache.set(cacheKey, normalizedUrl!) + } + + return normalizedUrl +} diff --git a/src/utils.ts b/src/utils.ts index 10ae0b8..7d6d0d1 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,5 +1,6 @@ import type { Jsonifiable } from 'type-fest' import dedent from 'dedent' +import hashObjectImpl from 'hash-object' import type * as types from './types.js' @@ -140,3 +141,7 @@ const dedenter = dedent.withOptions({ escapeSpecialCharacters: true }) export function cleanStringForModel(text: string): string { return dedenter(text).trim() } + +export function hashObject(object: Record): string { + return hashObjectImpl(object, { algorithm: 'sha256' }) +}